Introduction

This question aims to determine whether there is a correlation between the number of prescriptions for salbutamol inhalers (including branded salbutamol inhalers) and the proportion of households without central heating in NHS Scotland healthboards.

Salbutamol inhalers are used to relieve the symptoms of respiratory conditons, such as asthma and chronic obstructive pulmonary disease. Patients may use their salbutamol inhaler when they experience symptoms, such as breathlessness, tightness in their chest, coughing and wheezing. These symptoms are usually exacerbated by the cold weather. Therefore, this questions aims to determine if there is a difference between prescriptions for salbutamol inhalers in areas with higher proportions of households with no central heating. We will look at the months which are the coldest in Scotland, the winter months, which for 2021/2022 were December, January and February.

pacman::p_load(tidyverse, janitor, here, readxl, magrittr, ggplot2, plotly, ggiraph, sf)
#load data for central heating
heating_data2022 <- read_excel(here("data", "table_2025-11-12_19-17-58.xlsx"))

#tidy dataset and change values in hb_area column to include NHS + healthboard name
tidy_heating2022 <- heating_data2022 %>% 
  rename(c("hb_area2019" = ...2,
           "occupied_households" = ...3,
           "no_central_heating" = ...4)) %>% 
  select(c(hb_area2019:no_central_heating)) %>%
  mutate(hb_area2019 = paste("NHS", hb_area2019)) %>% #add NHs to name of healthboard
  filter(!row_number() %in% c(1:9, 24:28)) #remove, since rows contain no data

#change data for no_central_heating and occupied_households into integers (from characters) and add a column that shows the proportion of households without central heating
tidy_heating2022 %<>% mutate(no_central_heating = as.numeric(no_central_heating),
         occupied_households = as.numeric(occupied_households)) %>% 
    mutate(percent_no_heating = (no_central_heating/occupied_households)*100) %>% #add column percent_no_heating 
  select(c(hb_area2019, percent_no_heating))

To find the dataset for census data regarding central heating, go to: https://www.scotlandscensus.gov.uk/webapi/jsf/tableView/tableView.xhtml

#function to clean datasets
clean_prescriptions <- function(df){
  df %>% 
    drop_na() %>%
    clean_names()
  }

data_dec2021 <- read_csv("https://www.opendata.nhs.scot/dataset/84393984-14e9-4b0d-a797-b288db64d088/resource/ad9e7b46-47fb-4d42-baad-f8e98e8f5936/download/pitc202112.csv") %>%
  clean_prescriptions()

data_jan2022 <- read_csv("https://www.opendata.nhs.scot/dataset/84393984-14e9-4b0d-a797-b288db64d088/resource/53a53d61-3b3b-4a12-888b-a788ce13db9c/download/pitc202201.csv") %>%
  clean_prescriptions()

data_feb2022 <- read_csv("https://www.opendata.nhs.scot/dataset/84393984-14e9-4b0d-a797-b288db64d088/resource/bd7aa5c9-d708-4d0b-9b28-a9d822c84e34/download/pitc202202.csv") %>%
  clean_prescriptions()

# links <- read_csv(here("file_links.csv")) %>% 
#   select(link) %>% 
#   pull
#function to filter for salbutamol inhalers and summarise inhaler prescriptions
salbut_relievers <- c("SALBUTAMOL", "VENTOLIN", "AIROMIR", "ASMALAL", "EASYHALER", "PULVINAL", "SALAMOL", "EASI-BREATHE", "SALBULIN") #names of salbutamol and brand equivalents

salbut_presc <- function(df){
  df %>% 
  filter(str_detect(bnf_item_description,
                    paste(salbut_relievers, collapse = "|"))) %>% 
  filter(!(str_detect(bnf_item_description,
                      "FORMOTEROL|BECLOMET|BUDESONIDE|FOBUMIX"))) %>% 
  select(c(hbt, bnf_item_description, paid_quantity)) %>% #prepare dataset
  group_by(hbt) %>% 
  summarise(prescribed_salbutamol = sum(paid_quantity)) %>% #add new column for sum
  subset(hbt != "SB0806") #removing row with code SB0806
}

#filtering datasets for each winter month in 2021/2022 and 2015/2016

salbut_dec2021 <- salbut_presc(data_dec2021) %>% 
  rename("salbutamol_dec21" = prescribed_salbutamol)

salbut_jan2022 <- salbut_presc(data_jan2022) %>% 
  rename("salbutamol_jan22" = prescribed_salbutamol)

salbut_feb2022 <- salbut_presc(data_feb2022) %>% 
  rename("salbutamol_feb22" = prescribed_salbutamol)

remove code for Scottish Ambulance Service (SB0806): https://www.opendata.nhs.scot/dataset/non-standard-geography-codes-and-labels/resource/0450a5a2-f600-4569-a9ae-5d6317141899

#join datasets for 2021/2022 together
join_prescription <- function(df1, df2){
  df1 %>% 
    full_join(df2, join_by("hbt" == "hbt"))
}
#join winter 2021/2022 data together
joined_salbut_2122 <- join_prescription(salbut_dec2021, salbut_jan2022)

joined_salbut_2122 <- join_prescription(joined_salbut_2122, salbut_feb2022)
#join with hb names
HB_names <- read_csv("https://www.opendata.nhs.scot/dataset/9f942fdb-e59e-44f5-b534-d6e17229cc7b/resource/652ff726-e676-4a20-abda-435b98dd7bdc/download/hb14_hb19.csv") %>% 
  clean_names()

hb_salbut_2122 <- joined_salbut_2122 %>% 
  full_join(HB_names, join_by("hbt" == "hb")) %>% 
  select(c(hbt, hb_name, starts_with("salbutamol"))) %>% 
  filter(!is.na(salbutamol_dec21)) #rows contain old area codes (now archived)

Area codes from row 15 to 18 are the old area codes from the 2014 healthboard boundaries, but which were changed in 2019. find link

#join population data
population_data <- read_csv("https://www.opendata.nhs.scot/dataset/7f010430-6ce1-4813-b25c-f7f335bdc4dc/resource/27a72cc8-d6d8-430c-8b4f-3109a9ceadb1/download/hb2019_pop_est_03102025.csv")

population_2122 <- population_data %>% 
  filter(Year == "2022" & Sex == "All") %>% 
  select(c(HB, AllAges)) %>% 
  filter(HB != "S92000003") #remove country code for Scotland

#functions for salbutamol per 100k 
# salbut_per100k <- function(x){
#   x*100000/population
# }

#join prescription data and population data and add columns for prescriptions per 100k
# salbut_population_2122 <- hb_salbut_2122 %>% 
#   full_join(population_2122, join_by("hbt" == "HB")) %>% 
#   rename("population" = AllAges) %>%   
#   mutate(across(c(starts_with("salbutamol")), salbut_per100k), .names = "per100k_{.col}")

salbut_population_2122 <- hb_salbut_2122 %>%
  full_join(population_2122, join_by("hbt" == "HB")) %>%
  rename("population" = AllAges) 

salbut_per100k <- salbut_population_2122%>%
  mutate(across(starts_with("salbutamol")),
         (pick(starts_with("salbutamol"))*100000)/population) #.names = "per100k_{.col}", add later 
  
averages_salbutamol <- salbut_population_2122 %>%
  rowwise() %>% 
  mutate(average_salbut = mean(c_across(starts_with("Salbutamol")))) %>% 
  mutate(average_salbut_per100k = (average_salbut*100000)/population)

S92000003 is the country code for Scotland, so this row tells us the population of Scotland in this year. So, we can remove it.

#join average_salbutamol with heating data

#join asthma_data_12_21 and heating data (2022)
final_salbutamol2122 <- averages_salbutamol %>% 
  full_join(tidy_heating2022, by = (c("hb_name" = "hb_area2019")))

#plotting data for % of households with heating in a healthboard and prescriptions of salbutamol

interactive_plot <- final_salbutamol2122 %>% 
  ggplot(aes(x = percent_no_heating, 
             y = average_salbut_per100k, 
             colour = hb_name)) +
  geom_point() +
  theme_bw() +
  labs( title = "Salbutamol prescription against % of households without heating in each Scottish healthboard",
        subtitle = "Winter 2021/2022",
        x = "no central heating (%)",
        y = "Salbutamol prescriptions per 100k people")

ggplotly(interactive_plot)
myplot <- final_salbutamol2122 %>% 
  ggplot(aes(x = percent_no_heating, 
             y = average_salbut_per100k, 
             colour = hb_name,
             tooltip = hb_name, 
             data_id = hb_name)) +
  geom_point_interactive( size = 2, hover_nearest = TRUE) +
  theme_bw() +
  labs( title = "Salbutamol prescription against % of households without heating in each Scottish healthboard",
        subtitle = "Winter 2021/2022",
        x = "no central heating (%)",
        y = "Salbutamol prescriptions per 100k people",
        fill = "Healthboard")

interactive_plot <- girafe(ggobj = myplot) %>% 
  girafe_options()
#plot table
salbut_per100k
## # A tibble: 14 × 6
##    hbt     hb_name salbutamol_dec21 salbutamol_jan22 salbutamol_feb22 population
##    <chr>   <chr>              <dbl>            <dbl>            <dbl>      <dbl>
##  1 S08000… NHS Ay…           14626.           11743.           11533.     365450
##  2 S08000… NHS Bo…            9652.            8317.            8057.     116820
##  3 S08000… NHS Du…           20090.           15871.           15962.     145770
##  4 S08000… NHS Fo…           13171.           10878.           10585.     302810
##  5 S08000… NHS Gr…            8591.            6710.            6445.     582300
##  6 S08000… NHS Hi…           10294.            7532.            7193.     323640
##  7 S08000… NHS Lo…           11361.            8944.            8346.     905800
##  8 S08000… NHS Or…            5597.            4485.            4131.      22030
##  9 S08000… NHS Sh…            7076.            5795.            5065.      23020
## 10 S08000… NHS We…           15253.           13449.           10995.      26120
## 11 S08000… NHS Fi…            7821.            6863.            6293.     371390
## 12 S08000… NHS Ta…            9339.            7213.            6847.     414270
## 13 S08000… NHS Gr…           23888.           19772.           18604.    1179200
## 14 S08000… NHS La…           12921.           10084.           10044.     668380
#visualise the percentage of no heating in each healthboard

scot_hb <- st_read(here("data", "SG_NHS_HealthBoards_2019", "SG_NHS_HealthBoards_2019.shp")) %>% 
  mutate(HBName = paste("NHS", HBName))
## Reading layer `SG_NHS_HealthBoards_2019' from data source 
##   `C:\Users\Admin\OneDrive\Documents\data_science\B253518\data\SG_NHS_HealthBoards_2019\SG_NHS_HealthBoards_2019.shp' 
##   using driver `ESRI Shapefile'
## Simple feature collection with 14 features and 4 fields
## Geometry type: MULTIPOLYGON
## Dimension:     XY
## Bounding box:  xmin: 5512.998 ymin: 530250.8 xmax: 470332 ymax: 1220302
## Projected CRS: OSGB36 / British National Grid
heating_map_data <- tidy_heating2022 %>% 
  full_join(scot_hb, join_by("hb_area2019" == "HBName"))

heating_plot <- heating_map_data %>% 
ggplot(aes(fill = percent_no_heating)) +
  geom_sf(aes(geometry = geometry)) +
  scale_fill_viridis_c(name = "% of households with no heating") +
  labs(title = "Distribution of households with no central heating",
       subtitle = "by Healthboards 2022")
heating_plot

heating_map <- girafe(ggobj = heating_plot)

plot(heating_map_data$percent_no_heating)

Reference List

https://www.nhs.uk/medicines/salbutamol-inhaler/about-salbutamol-inhalers/

https://www.nhs.uk/conditions/chronic-obstructive-pulmonary-disease-copd/

https://www.asthmaandlung.org.uk/living-with/cold-weather